/*********** NOTES HEADER *********** 

DESCRIPTION: This program reduces the claims level information into quarterly information unique by beneficiary.
	It reshapes the provider-beneficiary-quarter information. It creates the firm-level information. 
	The program creates the vertical merger variable and the post-merger variable  

Datasets used:
	(1) vertmergers
	(2) ANALYSIS_V1_`yr'
	(3) 

Datasets created:
	(1) ANALYSIS_V2_`yr'
	(2) ANALYSIS_V3_`yr' 
	(3) ANAL_LIM_BENE_2005-2012
	(4) turn`i' and turn.dta
	(5) BENE-LEVELTURNED_FINAL_`i'
	(6) TAX_TURN_`i'
	(7) TAX_TURN_2005_2012
	(8) BENEQUARTER_FINAL_`i'
	
************************************/ 

set more off
capture log close
log using "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\4.Bene and Provider.log" , replace
clear all
global origData "N:\MedicareClaims-P045601-BE"
global dataIn "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-In\"
global dataOut "N:\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out"
global dpath "N:\MedicareClaims-P045601-BE\Work\ay_data"
global skapath "N:\MedicareClaims-P045601-BE\Work\ska"

adopath +  N:\SIL-Common\estout
adopath +  N:\SIL-Common\outreg2
adopath +  N:\SIL-Common\reghdfe-master\package


*BEGIN
***Setup the timing of the merger

tempfile vmerger_npi
tempfile vmerger_upin

use "$dataIn/vertmergers"
	keep  *ID Target Buyer date q_of_merger  
	capture gen npi_ID=ID
	gen upin_ID=npi_ID
	preserve
	duplicates drop
	sort npi_ID
save `vmerger_npi', replace
	sort npi_ID
  merge m:1 npi_ID using `vmerger_npi'
	drop if _merge==2
	drop _merge
  save `vmerger_npi', replace
	clear
	restore
	drop npi_ID
	duplicates drop
	sort upin_ID
  save `vmerger_upin', replace

merge m:1 upin_ID using `vmerger_upin'
	drop if _merge==2
	drop _merge
save `vmerger_upin', replace
clear

***End merger timing stuff


****BEGIN ANALYSIS CLEANING*******

*NOTE: Define the physician's location by the patient. 
*This may cause the physician to have multiple different locations in the dataset
*Assume that the patient sees the physician at the closest location to themselves

foreach yr of numlist 2005(1)2012 {
use "$dataOut/ANALYSIS_V1_`yr'.dta", replace
	drop _merge
	sort upin_ID
merge m:1 upin_ID using `vmerger_upin'
	drop _merge
	sort npi_ID

merge m:1 npi_ID using `vmerger_npi'

 gen PHYS_STATE = STATE_CD1_npi if STATE_CODE == STATE_CD1_npi
 gen PHYS_ZIP = ZIP1_npi if STATE_CODE == STATE_CD1_npi
 gen PHYS_SPCLTY = SPCLTY1_npi if STATE_CODE == STATE_CD1_npi

	foreach var of varlist STATE_CD* {
	  local j = subinstr("`var'","STATE_CD","",.)
	  local j = subinstr("`j'","_npi","",.)
	 di "`j'"
	replace PHYS_STATE = `var' if STATE_CODE == `var'
	replace PHYS_ZIP = ZIP`j'_npi if STATE_CODE == `var'
	replace PHYS_SPCLTY = SPCLTY`j'_npi if STATE_CODE == `var'
 drop `var' ZIP`j'_npi SPCLTY`j'_npi
 
 }

*THIS CREATES A PATIENT-LEVEL HEALTH OUTCOME VARIABLE THAT IS CONSISTENT IN TIME WITHIN A YEAR
*ALL CONDITIONS PROGRESS AND PATIENTS CAN'T RECOVER

*Main analysis conditions
*5,4,3 refers to the number of digits used to merge on the definition  
	gen hypertension = max(hypertension_5, hypertension_4,hypertension_3)
	gen malig_hyp = max(malignant_5, malignant_4,malignant_3)
	gen benign_hyp = max(benign_5, benign_4,benign_3)
	gen anychronic = max(chronic_4, chronic_3)

 drop *_5 *_4 *_3 icd3_num icdbin _merge
	gen age = round((date - BENE_BIRTH_DT)/365)
	gen DEAD = BENE_DEATH_DT<date  

 drop ami cataract chf ischemicheart glaucoma 

*Defining this from the member summary files. Won't use this in the final analysis
	gen ami = AMI_EVER<date
	gen cataract = CATARACT_EVER<date
	gen chf = CHF_EVER<date
	gen ischemicheart = ISCHEMICHEART_EVER<date
	gen glaucoma = GLAUCOMA_EVER<date
  
	sort BENE_ID date
		foreach var of varlist readmission hyper* malig_hyp benign_hyp diabetes icd_* diab_compl1 diab_compl2 *chronic *acute ami cataract chf DIABETES ischemicheart glaucoma STROKE_TIA {
			egen mx`var' = max(`var'), by(BENE_ID date)
			by BENE_ID: gen mx`var'_p = mx`var' if _n==1	
			by BENE_ID: replace mx`var'_p = max(mx`var',mx`var'_p[_n-1]) if _n>1
		drop mx`var'
	}
 
	 save "$dataOut/ANALYSIS_V2_`yr'.dta", replace
	 
	 clear

	 }


*MAKE CLAIMS-DATASET QUARTERLY  
	foreach yr of numlist 2006(1)2011 {
	 use "$dataOut/ANALYSIS_V2_`yr'.dta", replace
		gen quarter=mdy(quarter(date)*3,1,year)
		rename mxreadmission max_readmission
		format quarter %td
 
 collapse (sum) max_readmission (max) mx* DEAD (min) age , by(BENE* TAX* PHYS* quarter STATE_CODE NUM_NPI ORG_NPI_NUM NPI UPIN npi_ID upin_ID q_of_merger)
	 replace age=-1 if age==.
	  encode BENE_RACE_CD, gen(race_pat)
	  encode BENE_SEX , gen(sex_pat)
	  tostring BENE_ZIP_CD, replace format(%9.0f)
	 gen zip_pat=BENE_ZIP_CD

save "$dataOut/ANALYSIS_V3_`yr'.dta", replace

}

	use "$dataOut/ANALYSIS_V3_2005.dta" 
	 append using "$dataOut/ANALYSIS_V3_2006.dta" 
	 append using "$dataOut/ANALYSIS_V3_2007.dta" 
	 append using "$dataOut/ANALYSIS_V3_2008.dta" 
	 append using "$dataOut/ANALYSIS_V3_2009.dta" 
	 append using "$dataOut/ANALYSIS_V3_2010.dta" 
	 append using "$dataOut/ANALYSIS_V3_2011.dta" 
	 append using "$dataOut/ANALYSIS_V3_2012.dta" 

		sort BENE_ID quarter
		foreach var of varlist mx* {
			local i = subinstr("`var'","mx","",.)
			rename `var' `i'

			egen `var' = max(`i'), by(BENE_ID quarter)
				by BENE_ID: gen `var'_all = `var' if _n==1
				by BENE_ID: replace `var'_all = max(`var',`var'_all[_n-1]) if _n>1
			drop `var' `i'
		}

	save "$dataOut/ANAL_LIM_BENE_2005-2012.dta", replace

*Reshaping a DR-BENE-Quarter dataset to a BENE-Quarter dataset 
  forval i=2005(1)2012 { 
		use "$dataOut/ANAL_LIM_BENE_2005-2012.dta", replace
			drop if BENE_ID==""
			keep if year(quarter)==`i'
			rename TAX_NUM1 TAXNUM 
		keep BENE_ID quarter TAX* PHYS* npi_ID upin_ID q_of_merger 
		drop TAX_*
			gen vm_PROVIDER=npi_ID
			replace vm_PROVIDER=upin_ID if npi_ID==.
		drop npi_ID upin_ID
		label value vm_PROVIDER mergerid
		duplicates drop
		
		sort BENE_ID quarter TAXNUM
		 by BENE_ID quarter: gen cnt=_n 
		 
		 *This is the reshape command
			*this is a huge data issue
		 reshape wide TAX* PHYS* vm_PROVIDER q_of_merger , i(BENE_ID quarter) j(cnt)
		 tempfile turn
		sort BENE_ID quarter
	save "$dataOut/turn`i'.dta", replace
		clear
	}


*State, specialty, ZIP code
	use PHYS_STATE PHYS_SPCLTY PHYS_ZIP using "$dataOut/ANAL_LIM_BENE_2005-2012.dta"
		duplicates drop
		 keep if PHYS_STATE~=""
	 	 keep if PHYS_SPCLTY~=""
		levelsof PHYS_STATE, local(pstate)
		levelsof PHYS_SPCLTY, local(pspec)
	clear

	
*Turn.dta assumes the all doctors bill through one TIN and has the doctor information for all 76 doctors potentially seen for each patient-quarter 
	use "$dataOut/turn2005.dta"
		append using "$dataOut/turn2006.dta"
		append using "$dataOut/turn2007.dta"
		append using "$dataOut/turn2008.dta"
		append using "$dataOut/turn2009.dta"
		append using "$dataOut/turn2010.dta"
		append using "$dataOut/turn2011.dta"
		append using "$dataOut/turn2012.dta"

	save "$dataOut/turn.dta", replace

	use "$dataOut/turn.dta"
	*This gets the count of mergers for the observation
	 qui desc q_of_merger*, varlist
		local vlist `r(varlist)'
		local ct = wordcount("`vlist'")
		local thisvar: word `ct' of `vlist'
		local ctv = regexr("`thisvar'","[A-Za-z\_]+","")

*Assuming the earliest merger date
	gen mndate = q_of_merger1
	gen physcnt = 0
 
 *`ctv' is the number of physicians for a beneficiary  
 forval i=1(1)`ctv' {
 *Patients see many firms; sometimes multiple firms are involved in a merger at different dates
	replace mndate= q_of_merger`i' if q_of_merger`i'<mndate
	replace physcnt=`i' if PHYS_STATE`i'~=""
	replace TAXNUM`i' ="" if TAXNUM`i'=="000000000"
	replace TAXNUM`i' ="" if TAXNUM`i'=="999999999"
  }
 
 local cnt =0
 
 foreach i of numlist 1(1)`ctv' {
  local cnt = `cnt'+1
 
 *Patients see multiple specialties
 if `cnt' <= 1 {
   foreach var of local pspec {
  gen PSPEC`var' = PHYS_SPCLTY`i'=="`var'"
   }
  }
  if `cnt' > 1 {
   foreach var of local pspec {
 di "`cnt'"
 replace PSPEC`var' =1 if PHYS_SPCLTY`i' =="`var'"
    }
   }
  } 

  drop PHYS_SPCLTY*
  
 *Patients see physicians in multiple states
 local cnt =0
 foreach i of numlist 1(1)`ctv' {
  local cnt = `cnt'+1
  if `cnt' <= 1 {
   foreach var of local pstate {
  gen PSTATE`var' = PHYS_STATE`i'=="`var'"
   }
  }
  if `cnt' > 1 {
   foreach var of local pstate {
  di "`cnt'"
  replace PSTATE`var' =1 if PHYS_STATE`i' =="`var'"
    }
   }
  } 
  
  
	egen taxid = group(TAXNUM*) 
		format mndate %tq  
		drop TAXNUM* q_of_merger* PHYS_STATE*
	save "$dataOut/turn3.dta", replace
	 clear
 
 *Quarterly dataset with a little cleanup
	use "$dataOut/ANAL_LIM_BENE_2005-2012.dta", replace
		drop NPI UPIN NUM_NPI ORG_NPI_NUM TAX_NUM1 TAX_NUM* PHYS* npi_ID upin_ID q_of_merger patid
		 egen mxage=max(age), by(BENE_I quarter)
		 egen mxdead=max(DEAD), by(BENE_I quarter)
		drop if quarter ==.
		duplicates drop
	save "$dataOut/BENE_2005-2012.dta"

sort BENE_I quarter
merge 1:1 BENE_I quarter using  "$dataOut/turn3.dta"
drop _merge
 
 
************************************************************************

*THIS CODE CLEANS UP THE HEALTH VARIABLE TO ENSURE THAT THESE HEALTH CONDITIONS
*ARE PERPETUAL. ONCE ACQUIRED, THE PATIENT HAS THEM FOREVER

*THIS CODE ALSO CREATES THE POSTMERGER VARIABLE FROM THE TURNED DATASET

***********************************************************************

	sort BENE_I quarter
		foreach x of varlist mxhypertension_p_all mxmalig_hyp_p_all mxbenign_hyp_p_all mxdiabetes_p_all mxdiab_compl1_p_all mxdiab_compl2_p_all mxicd8_acute_p_all mxanychronic_p_all mxami_p_all  mxcataract_p_all  mxchf_p_all  mxDIABETES_p_all  mxischemicheart_p_all  mxglaucoma mxSTROKE_TIA_p_all mxdead_p {
			if regexm("`x'","^mx([a-z0-9]+_[a-z0-9]+).*") { 
			local var "`=regexs(1)'"
			di "`var'" 
		by BENE_ID : gen new`var' = (`x' == 1 & `x'[_n-1] == 0 )
			}
		} 

	egen docid=group(tin*), missing

	gen qtr=yq(year(quarter),quarter(quarter))

	
*MERGER DATE AND POST-MERGER IDENTIFIER	
	replace q_of_merger = mndate

	*VERTICAL MERGERS AND POSTMERGER VARIABLE
		foreach x of varlist q_of_merger {
			if regexm("`x'","^q_of_merger([0-9]+).*") { 
			local k "`=regexs(1)'"
			if `k' == 1 {
				gen postmerger = ( `x' < qtr & `x' != . )
				gen vmerger= `x' != . 
			}

		if `k' > 1 {
			di "high `k'"
			replace postmerger = 1 if `x' < qtr & `x' != . 
			replace vmerger=1 if `x' != . 
				}
			}
		}

	forval i=2005(1)2012 {
		preserve
		 keep if year(quarter) == `i' 
		 save "$dataOut/BENE-LEVELTURNED_FINAL_`i'.dta", replace
		restore
	}
 
	use BENE_ID quarter TAX* using "$dataOut/ANAL_LIM_BENE_2005-2012.dta" 
		duplicates drop
		drop if quarter==.
		bysort BENE_ID quarter: gen id = _n
		reshape long TAX_NUM, i(BENE_ID quarter id) j(tincount)

		drop id tincount
		drop if quarter==.
		drop if TAX==""
	duplicates drop

		sort BENE_ID quarter TAX
			by BENE_ID quarter : gen id =_n

		forval i=2005(1)2012 {
		 preserve
			keep if year(quarter)==`i'
			reshape wide TAX, i(BENE quarter) j(id)
				egen docid =group(TAX*)
		save "$dataOut/TAX_TURN_`i'.dta"
			restore
		}
		clear

*Merging on taxid (firm) information for the doctors		
	use "$dataOut/TAX_TURN_2005.dta"
		append using "$dataOut/TAX_TURN_2006.dta"
		append using "$dataOut/TAX_TURN_2007.dta"
		append using "$dataOut/TAX_TURN_2008.dta"
		append using "$dataOut/TAX_TURN_2009.dta"
		append using "$dataOut/TAX_TURN_2010.dta"
		append using "$dataOut/TAX_TURN_2011.dta"
		append using "$dataOut/TAX_TURN_2012.dta"

	capture drop docid

	foreach i of varlist TAX* {
		replace `i' ="0" if `i'==""
	}

	gen firmct =0
	local j = 0

	*counting the number of firms
	foreach i of varlist TAX* {
		local j = `j' +1
		replace firmct = `j' if `i' != "0"
	}

	egen docid = group(TAX*)
	drop TAX_NUM21-TAX_NUM`j'
	sort BENE_ID quarter 

	 save "$dataOut/TAX_TURN_2005_2012.dta", replace
	clear


forval i=2005(1)2012 {
	use "$dataOut/BENE-LEVELTURNED_FINAL_`i'.dta"
		capture drop _merge 
		capture drop max_readmission
	duplicates drop

	merge m:1 BENE_ID quarter using "$dataOut/TAX_TURN_2005_2012.dta" 
		keep if year(quarter) == `i' 

	save "$dataOut/BENEQUARTER_FINAL_`i'.dta", replace
	clear
}
